plus sn10x, SI

Baf53cre ENS neurons, SI data
Nb infection 5d, CTL x5 CKO(Il13ra1-cko) x5

loading 6k cells,
demo called 37,018 cells
plus called 37,898 cells

load dependancies

load 10x data

filt.10x <- Read10X(data.dir = "../output_plus/filtered_feature_bc_matrix/")
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
GEX <- filt.10x$`Gene Expression`
FB <- filt.10x$`Antibody Capture`

check datasets

dim(GEX)
## [1] 32285 37898
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##         AAACCCAAGACCAGAC-1 AAACCCAAGATACATG-1 AAACCCAAGATGCTGG-1
## Xkr4                     2                  1                  3
## Gm1992                   .                  .                  .
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
##         AAACCCAAGATGGTCG-1 AAACCCAAGCTCTATG-1 AAACCCAAGGCTTAAA-1
## Xkr4                     2                  1                  1
## Gm1992                   1                  .                  1
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
dim(FB)
## [1]    10 37898
FB[,1:6]
## 10 x 6 sparse Matrix of class "dgCMatrix"
##       AAACCCAAGACCAGAC-1 AAACCCAAGATACATG-1 AAACCCAAGATGCTGG-1
## CTL.1                240                183                 86
## CTL.2                 15                  9                  2
## CTL.3                 24                  6                 11
## CTL.4                 46                 26                 17
## CTL.5                107                 83                278
## CKO.1                840                111                 63
## CKO.2                 57                101                 27
## CKO.3                173                128                 54
## CKO.4                450                306                164
## CKO.5                192                130                 77
##       AAACCCAAGATGGTCG-1 AAACCCAAGCTCTATG-1 AAACCCAAGGCTTAAA-1
## CTL.1                122                557                 29
## CTL.2                 11                 34                  5
## CTL.3                 40                 16                  7
## CTL.4                 26                 61                 11
## CTL.5                 38                172                  9
## CKO.1                 89                290                 14
## CKO.2                 64                 67                 11
## CKO.3                 71                312                 14
## CKO.4                219                624                 49
## CKO.5                 78                301                 12
rowSums(FB)
##    CTL.1    CTL.2    CTL.3    CTL.4    CTL.5    CKO.1    CKO.2    CKO.3 
## 10296596   798688   593831  1195269  3221211  6204349  2251808  5080735 
##    CKO.4    CKO.5 
## 12781050  5917221
rowSums(FB>0)
## CTL.1 CTL.2 CTL.3 CTL.4 CTL.5 CKO.1 CKO.2 CKO.3 CKO.4 CKO.5 
## 37898 37669 37757 37891 37894 37896 37888 37897 37897 37893

FB

# build seurat object
FB.seur <- CreateSeuratObject(counts = FB,project = "Nb5d_SI")

FB.seur
## An object of class Seurat 
## 10 features across 37898 samples within 1 assay 
## Active assay: RNA (10 features, 0 variable features)
rownames(FB.seur)
##  [1] "CTL.1" "CTL.2" "CTL.3" "CTL.4" "CTL.5" "CKO.1" "CKO.2" "CKO.3" "CKO.4"
## [10] "CKO.5"

normalization

perform Seurat ’Demultiplexing with hashtag oligos(HTOs)
ref https://satijalab.org/seurat/articles/hashing_vignette.html

# normalize
FB.seur <- NormalizeData(FB.seur)
FB.seur <- FindVariableFeatures(FB.seur, selection.method = "mean.var.plot")
FB.seur <- ScaleData(FB.seur, features = VariableFeatures(FB.seur))
## Centering and scaling data matrix
# independent assay for HTO data  
FB.seur[['HTO']] <- CreateAssayObject(counts = FB)
FB.seur <- NormalizeData(FB.seur, assay = 'HTO', normalization.method = 'CLR')
## Normalizing across features

check demux

first define FB colors based on conditions

color.FB <- ggsci::pal_d3("category20c")(20)[c(1,6,11,16,
                                               2,7,12,17)]
color.ad <- ggsci::pal_igv("default")(49)[1:2]

color.FB <- c(color.ad[1],color.FB[1:4],
              color.ad[2],color.FB[5:8])

level.FB <- c(rownames(FB.seur))

color.FBraw <-  c("#FF6C91","lightgrey",color.FB)
scales::show_col(color.FBraw, ncol = 7)

scales::show_col(color.FB, ncol = 5)

par(mfrow=c(2,5))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])

plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
plot(sort(t(FB)[,9]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[9])
plot(sort(t(FB)[,10]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[10])

par(mfrow=c(2,5))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
plot(sort(t(FB)[,2],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
plot(sort(t(FB)[,3],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
plot(sort(t(FB)[,4],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
plot(sort(t(FB)[,5],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])

plot(sort(t(FB)[,6],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
plot(sort(t(FB)[,7],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
plot(sort(t(FB)[,8],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
plot(sort(t(FB)[,9],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[9])
plot(sort(t(FB)[,10],decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[10])

range ‘q’

q0.50 ~ 0.99
#table.demux
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,5))

plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("CTL|CKO",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")

plot(table.demux$quantile, pch=16, ylab="mod-quantile")
plot.new()

plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])
plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])
plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])
plot(table.demux[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux)[2+7])
plot(table.demux[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux)[2+8])

plot(table.demux[,2+9], type="p", col=color.FB[9], pch=16, ylab=colnames(table.demux)[2+9])
plot(table.demux[,2+10], type="p", col=color.FB[10], pch=16, ylab=colnames(table.demux)[2+10])

q0.9900 ~ 0.9999
options(max.print=5000)
#table.demux.s
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,5))


plot(table.demux.s$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux.s$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux.s[,grep("CTL|CKO",colnames(table.demux.s))]), type="p", col="#306000", pch=16, ylab="Singlet")

plot(table.demux.s$quantile, pch=16, ylab="mod-quantile")
plot.new()

plot(table.demux.s[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux.s)[2+1])
plot(table.demux.s[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux.s)[2+2])
plot(table.demux.s[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux.s)[2+3])
plot(table.demux.s[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux.s)[2+4])
plot(table.demux.s[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux.s)[2+5])
plot(table.demux.s[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux.s)[2+6])
plot(table.demux.s[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux.s)[2+7])
plot(table.demux.s[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux.s)[2+8])

plot(table.demux.s[,2+9], type="p", col=color.FB[9], pch=16, ylab=colnames(table.demux.s)[2+9])
plot(table.demux.s[,2+10], type="p", col=color.FB[10], pch=16, ylab=colnames(table.demux.s)[2+10])

demultiplexing

using q0.9999, tag3 q0.99

# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.99)
## Cutoff for CTL.1 : 209 reads
## Cutoff for CTL.2 : 15 reads
## Cutoff for CTL.3 : 15 reads
## Cutoff for CTL.4 : 34 reads
## Cutoff for CTL.5 : 66 reads
## Cutoff for CKO.1 : 140 reads
## Cutoff for CKO.2 : 62 reads
## Cutoff for CKO.3 : 128 reads
## Cutoff for CKO.4 : 296 reads
## Cutoff for CKO.5 : 170 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    23531     3260    11107
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CTL.5    CKO.1 
##    23531     3260     1672     1660     1276      465     1483     1405 
##    CKO.2    CKO.3    CKO.4    CKO.5 
##     1281      598      476      791
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.999)
## Cutoff for CTL.1 : 279 reads
## Cutoff for CTL.2 : 20 reads
## Cutoff for CTL.3 : 19 reads
## Cutoff for CTL.4 : 45 reads
## Cutoff for CTL.5 : 87 reads
## Cutoff for CKO.1 : 193 reads
## Cutoff for CKO.2 : 85 reads
## Cutoff for CKO.3 : 170 reads
## Cutoff for CKO.4 : 379 reads
## Cutoff for CKO.5 : 236 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    18049     6219    13630
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CTL.5    CKO.1 
##    18049     6219     2010     2134     1482      616     1817     1792 
##    CKO.2    CKO.3    CKO.4    CKO.5 
##     1644      728      614      793
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.9998)
## Cutoff for CTL.1 : 326 reads
## Cutoff for CTL.2 : 23 reads
## Cutoff for CTL.3 : 22 reads
## Cutoff for CTL.4 : 52 reads
## Cutoff for CTL.5 : 102 reads
## Cutoff for CKO.1 : 228 reads
## Cutoff for CKO.2 : 101 reads
## Cutoff for CKO.3 : 199 reads
## Cutoff for CKO.4 : 433 reads
## Cutoff for CKO.5 : 281 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    14794     8544    14560
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CTL.5    CKO.1 
##    14794     8544     2144     2383     1536      684     1930     1954 
##    CKO.2    CKO.3    CKO.4    CKO.5 
##     1713      784      674      758
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.9999)
## Cutoff for CTL.1 : 345 reads
## Cutoff for CTL.2 : 25 reads
## Cutoff for CTL.3 : 23 reads
## Cutoff for CTL.4 : 55 reads
## Cutoff for CTL.5 : 108 reads
## Cutoff for CKO.1 : 243 reads
## Cutoff for CKO.2 : 108 reads
## Cutoff for CKO.3 : 211 reads
## Cutoff for CKO.4 : 456 reads
## Cutoff for CKO.5 : 300 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    13508     9554    14836
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CTL.5    CKO.1 
##    13508     9554     2172     2403     1574      706     1994     1991 
##    CKO.2    CKO.3    CKO.4    CKO.5 
##     1753      783      727      733
# tags q0.9999
# tag10 q0.99

#cutoff.FB <- c(345,25,23,55,108,
#               243,108,211,456,200)


# manual 
cutoff.FB <- c(468,32,28,48,130,
               250,81,264,538,278)
names(cutoff.FB) <- rownames(FB.seur)
cutoff.FB
## CTL.1 CTL.2 CTL.3 CTL.4 CTL.5 CKO.1 CKO.2 CKO.3 CKO.4 CKO.5 
##   468    32    28    48   130   250    81   264   538   278
par(mfrow=c(2,5))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])

plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])

plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])

plot(sort(t(FB)[,9]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[9])
abline(h=cutoff.FB[9],col = color.FB[9])

plot(sort(t(FB)[,10]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[10])
abline(h=cutoff.FB[10],col = color.FB[10])

par(mfrow=c(2,5))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])

plot(sort(t(FB)[,5], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])

plot(sort(t(FB)[,6], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])

plot(sort(t(FB)[,9], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[9])
abline(h=cutoff.FB[9],col = color.FB[9])

plot(sort(t(FB)[,10], decreasing = T),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[10])
abline(h=cutoff.FB[10],col = color.FB[10])

## custom cutoff run this
custom.Demux <- function(FBobj,FBcutoff){
  # define decision matrix
  dd <- FBobj@assays[['HTO']]@counts
  dd <- apply(dd, 2, function(x){
    x > FBcutoff
  })
  x1 <- colSums(dd)
  x1[x1>1] <- "Doublet"
  x1[x1==0] <- "Negative"
  x1[x1==1] <- "Singlet"

  x2 <- x1
  
  bc.slt  <- names(x1)[x1=="Singlet"]
  
  for(bc in bc.slt){
    x2[bc] <- rownames(dd)[dd[,bc]]
  }
  
  FBdf <- data.frame(HTO_classification.global=factor(x1,
                                                      levels = c("Doublet","Negative","Singlet")),
                     hash.ID=factor(x2,
                                    levels = c("Doublet","Negative",rownames(dd))))
  return(FBdf)
}

df.FB <- custom.Demux(FB.seur,cutoff.FB)
## custom cutoff run this
dim(df.FB)
## [1] 37898     2
df.FB[1:10,]
##                    HTO_classification.global  hash.ID
## AAACCCAAGACCAGAC-1                   Singlet    CKO.1
## AAACCCAAGATACATG-1                   Singlet    CKO.2
## AAACCCAAGATGCTGG-1                   Singlet    CTL.5
## AAACCCAAGATGGTCG-1                   Singlet    CTL.3
## AAACCCAAGCTCTATG-1                   Doublet  Doublet
## AAACCCAAGGCTTAAA-1                  Negative Negative
## AAACCCAAGTCTGGTT-1                   Singlet    CKO.1
## AAACCCAAGTGGTTGG-1                  Negative Negative
## AAACCCACAAATGGAT-1                   Singlet    CKO.1
## AAACCCACAAGCCATT-1                   Doublet  Doublet
## custom cutoff run this
table(df.FB)
##                          hash.ID
## HTO_classification.global Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CTL.5 CKO.1
##                  Doublet    10463        0     0     0     0     0     0     0
##                  Negative       0    11638     0     0     0     0     0     0
##                  Singlet        0        0  1691  2303  1404  1053  1946  2323
##                          hash.ID
## HTO_classification.global CKO.2 CKO.3 CKO.4 CKO.5
##                  Doublet      0     0     0     0
##                  Negative     0     0     0     0
##                  Singlet   2684   616   744  1033
## custom cutoff run this
FB.seur@meta.data[,c("HTO_classification.global","hash.ID")] <- df.FB
FB.seur@meta.data[1:4,]
##                    orig.ident nCount_RNA nFeature_RNA nCount_HTO nFeature_HTO
## AAACCCAAGACCAGAC-1    Nb5d_SI       2144           10       2144           10
## AAACCCAAGATACATG-1    Nb5d_SI       1083           10       1083           10
## AAACCCAAGATGCTGG-1    Nb5d_SI        779           10        779           10
## AAACCCAAGATGGTCG-1    Nb5d_SI        758           10        758           10
##                    HTO_maxID HTO_secondID HTO_margin HTO_classification
## AAACCCAAGACCAGAC-1     CKO.1        CTL.3  1.0819386        CKO.1_CTL.3
## AAACCCAAGATACATG-1     CKO.2        CTL.5  0.3712439           Negative
## AAACCCAAGATGCTGG-1     CTL.5        CTL.3  1.0704500              CTL.5
## AAACCCAAGATGGTCG-1     CTL.3        CKO.2  0.4984798              CTL.3
##                    HTO_classification.global hash.ID
## AAACCCAAGACCAGAC-1                   Singlet   CKO.1
## AAACCCAAGATACATG-1                   Singlet   CKO.2
## AAACCCAAGATGCTGG-1                   Singlet   CTL.5
## AAACCCAAGATGGTCG-1                   Singlet   CTL.3
## default q0.99 run this
#FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
#                                            levels = c("Doublet","Negative","Singlet"))
#FB.seur$hash.ID <- factor(as.character(FB.seur$hash.ID),
#                          levels = c("Doublet","Negative",rownames(FB.seur)))
sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,18500),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+1280,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

RidgePlot

with ridge plots, raw
FB.seur@active.ident <- factor(FB.seur$HTO_maxID,levels=rownames(FB.seur))

RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]),same.y.lims= TRUE, ncol=5,
          cols = color.FB) 

with ridge plots, filtered
FB.seur@meta.data$hash.ID.sort <- factor(as.character(FB.seur@meta.data$hash.ID),levels = c("Doublet","Negative",levels(FB.seur@active.ident)))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]), ncol=5,same.y.lims= TRUE,
          cols = c("#FF6C91","lightgrey",color.FB),group.by = "hash.ID.sort") 

tSNE for HTOs

# first, remove negative cells from th object
#FB.seur.subset <- FB.seur

# Calculate a tSNE embedding of the HTO data
#DefaultAssay(FB.seur.subset) <- "HTO"

#FB.seur.subset <- ScaleData(FB.seur.subset, features = rownames(FB.seur.subset), verbose = FALSE)
#FB.seur.subset <- RunPCA(FB.seur.subset, features = rownames(FB.seur.subset), approx = FALSE)
#FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:10, perplexity = 100, check_duplicates = FALSE)


#saveRDS(FB.seur.subset, "FB0207.seur.subset.rds")
FB.seur.subset <- readRDS("FB0207.seur.subset.rds")
multiplot(
DimPlot(FB.seur.subset, order = rev(levels(FB.seur@active.ident)),
        cols = color.FB) + labs(title = "FB Max_ID"), 
DimPlot(FB.seur.subset, order = c("Doublet",rev(levels(FB.seur@active.ident)),"Negative"), group.by = 'hash.ID.sort', label = F,
        cols = c("lightgrey",color.FB,"#FF6C91")) + 
  labs(title = "FB Filtered_ID") + theme(plot.title = element_text(hjust = 0)) ,
cols = 1)

stat

FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
                                            levels = c("Doublet","Negative","Singlet"))
Idents(FB.seur) <- "HTO_classification.global"
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38"))

VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38")) + geom_jitter(alpha=0.015)

table(FB.seur@meta.data$hash.ID.sort)
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CTL.5    CKO.1 
##    10463    11638     1691     2303     1404     1053     1946     2323 
##    CKO.2    CKO.3    CKO.4    CKO.5 
##     2684      616      744     1033
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,14800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:12*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:12*1.2-0.45,y=sl_stat+875,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

GEX

mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html

GEX.seur <- CreateSeuratObject(counts = GEX,
                               min.cells = 3,
                               min.features = 200,
                               project = "Nb5d_SI")
GEX.seur
## An object of class Seurat 
## 24175 features across 37898 samples within 1 assay 
## Active assay: RNA (24175 features, 0 variable features)

filtering

MT genes

grep("^mt-",rownames(GEX),value = T)
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)

RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)

# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc

add FB info

GEX.seur$FB.info <- FB.seur@meta.data[rownames(GEX.seur@meta.data),"hash.ID"]
#head(GEX.seur@meta.data)
table(GEX.seur$FB.info)
## 
##  Doublet Negative    CTL.1    CTL.2    CTL.3    CTL.4    CTL.5    CKO.1 
##    10463    11638     1691     2303     1404     1053     1946     2323 
##    CKO.2    CKO.3    CKO.4    CKO.5 
##     2684      616      744     1033
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

GEX.seur <- subset(GEX.seur, subset = FB.info!="Doublet" & FB.info!="Negative" & FB.info!="CTL.4" & FB.info!="CKO.3" & FB.info!="CKO.4")
GEX.seur
## An object of class Seurat 
## 24175 features across 13384 samples within 1 assay 
## Active assay: RNA (24175 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

filtering

GEX.seur <- subset(GEX.seur, subset = percent.mt < 3 & nFeature_RNA > 500 & nFeature_RNA < 3600 & nCount_RNA < 9600)
GEX.seur
## An object of class Seurat 
## 24175 features across 13190 samples within 1 assay 
## Active assay: RNA (24175 features, 0 variable features)
filtered obj
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,14800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:12*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:12*1.2-0.45,y=sl_stat+775,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,4800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:12*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:12*1.2-0.45,y=sl_stat+375,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

check cellcycle

s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))

GEX.seur <- CellCycleScoring(GEX.seur,
                             s.features = s.genes,
                             g2m.features = g2m.genes)
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), group.by = "FB.info", 
    ncol = 2, pt.size = 0.1)

GEX.seur.cc <- GEX.seur

GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)

GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')

Markers and Clusters

Normalizing

GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2

head(VariableFeatures(GEX.seur), 200)
##   [1] "Mgat4c"        "Zfp804a"       "Wdr17"         "Cntn5"        
##   [5] "Grm5"          "Klhl1"         "Nmu"           "Adgrg6"       
##   [9] "Gm32647"       "Gm29521"       "Gm30613"       "Zfp804b"      
##  [13] "Brinp3"        "Robo2"         "Cpne4"         "Ntng1"        
##  [17] "Nrxn3"         "4930432L08Rik" "Col24a1"       "Tmeff2"       
##  [21] "Kcnb2"         "Cdh8"          "Pdzrn4"        "Ano2"         
##  [25] "Myh11"         "Cemip"         "Gm21847"       "Cntnap2"      
##  [29] "Lingo2"        "Dgkg"          "Cdh18"         "Ebf1"         
##  [33] "Ntrk3"         "Gpr149"        "Sgcz"          "Cdh19"        
##  [37] "Cdh9"          "Gal"           "Astn2"         "Kcnip4"       
##  [41] "4930509J09Rik" "Unc5d"         "Car10"         "Prkg1"        
##  [45] "Vwc2l"         "Nxph2"         "March1"        "Rbpms"        
##  [49] "Dgki"          "Gm38405"       "Nrg1"          "Dcc"          
##  [53] "Gm49938"       "Gm20754"       "1700111E14Rik" "Gm15261"      
##  [57] "Ano1"          "Schip1"        "8030451A03Rik" "Gpc5"         
##  [61] "Grin3a"        "Cmah"          "Robo1"         "Loxl2"        
##  [65] "Myl1"          "Itgb6"         "Clstn2"        "Pcdh9"        
##  [69] "Gm31045"       "Rasgef1b"      "Nkain3"        "Ccbe1"        
##  [73] "Pbx3"          "Sema5a"        "Plxna4"        "Mecom"        
##  [77] "Lpp"           "AC150683.1"    "Sst"           "Rab27b"       
##  [81] "Piezo2"        "Asic2"         "Meis2"         "Pde1a"        
##  [85] "Dpp10"         "Pcdh11x"       "Pcdh10"        "Cysltr2"      
##  [89] "Skap1"         "Zfhx3"         "9530059O14Rik" "Bmpr1b"       
##  [93] "Myo3b"         "Csmd1"         "Ghr"           "Egfem1"       
##  [97] "Rerg"          "Areg"          "Kctd16"        "Cdh6"         
## [101] "Kcnh7"         "Casp8"         "Calcb"         "Col8a1"       
## [105] "Gm15680"       "Apol7e"        "Efr3a"         "Gm30382"      
## [109] "Vip"           "Flt1"          "Gm26632"       "Actg2"        
## [113] "Chrna9"        "Tafa2"         "Ttc29"         "Cntn4"        
## [117] "Cacna2d3"      "Csmd3"         "Gm16685"       "Chsy3"        
## [121] "Spock3"        "Prkcq"         "Myh3"          "Fut9"         
## [125] "Abi3bp"        "Luzp2"         "Tnr"           "Ptk7"         
## [129] "Gm34544"       "Kcnq5"         "9530026P05Rik" "Fbxl7"        
## [133] "Abca9"         "Plcl1"         "1700034P13Rik" "Gm20713"      
## [137] "Trps1"         "Trhde"         "Gna14"         "Svep1"        
## [141] "Il1rapl1"      "Serpini1"      "Flrt2"         "Col25a1"      
## [145] "Wee2"          "Kif26b"        "Nxph1"         "Galnt18"      
## [149] "Nell1"         "Gm15584"       "Pde4d"         "Gm49953"      
## [153] "A330008L17Rik" "Cpa6"          "E230025N22Rik" "Fmo2"         
## [157] "Gm30094"       "5530401A14Rik" "4930422I22Rik" "Penk"         
## [161] "Spag16"        "Adgrd1"        "Galnt13"       "Chrm3"        
## [165] "Gpm6a"         "Opcml"         "Kctd8"         "4930587E11Rik"
## [169] "8030451O07Rik" "Itga1"         "Shisa9"        "Gm49678"      
## [173] "Fgf14"         "Gm26737"       "Rasgrf1"       "Gm11342"      
## [177] "Hs3st4"        "Tmem79"        "Prune2"        "Iqgap2"       
## [181] "Tenm4"         "Gm37267"       "Tacr1"         "Cntn3"        
## [185] "D030068K23Rik" "Hs6st3"        "Gm21798"       "Scd3"         
## [189] "Etl4"          "Zfp286os"      "Gm11961"       "Rad51b"       
## [193] "Tac1"          "Cartpt"        "Gm40841"       "Aff2"         
## [197] "Arhgap6"       "Ano5"          "Atpaf2"        "Slc4a4"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix

PCA

# exclude MT genes  and more 

#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)

DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AW|^BC|^Gm|^Hist|Rik$|-ps",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))


VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,
                                                  DIG, 
                                                  CC_gene) )

GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1 
## Positive:  Ntrk3, Tmeff2, Robo2, Ano2, Cdh8, Cpne4, Nrxn3, Myl1, Mgat4c, Clstn2 
##     Dgkg, Adgrg6, Pdzrn4, Gpr149, Grin3a, Astn2, Plxna4, Cntn5, Sgcz, Pcdh10 
##     Spock3, Cacna2d3, Kif26b, Cysltr2, Zfp804a, Cntnap2, Cux2, Cdh6, Iqgap2, Itgb6 
## Negative:  Lrrtm4, Galntl6, Ndst4, Tox, Epha5, Kcnd2, Grik1, Kcnab1, Pcdh7, Tshz2 
##     Bnc2, Nos1, Chrna7, Zfp536, Lrp1b, Cdc14a, Lrrc4c, Synpo2, Galnt17, Specc1 
##     Dach1, Syt6, Rspo2, Fat3, Tenm3, Plcxd3, Cadps2, Gfra1, Lrrc7, Adgrl3 
## PC_ 2 
## Positive:  Auts2, Nos1, Egfem1, Kcnq5, Etv1, Cadps2, Dgkb, Asic2, Schip1, Gfra1 
##     Cmah, Rgs6, Epha5, Kcnab1, Kcnt2, Alcam, Dach1, Stxbp6, Il1rapl1, Adgrl3 
##     Tmem108, Lrrc4c, Creb5, Ebf1, Grid1, Pde4d, Cdh11, Gria3, Hs6st3, Pde1a 
## Negative:  Rbfox1, Ptprt, Bnc2, Tshz2, Tafa1, Grik1, Gpc6, Frmd4b, Tox, Brinp2 
##     Fbxw15, St6galnac3, Tmem132c, Cdc14a, Tcf7l2, Caln1, Pcdh7, Plcxd3, Fbxw24, Pld5 
##     Adamtsl1, Sdk2, Slc35f4, Specc1, Oprk1, Dock2, Nfia, Unc5c, Stxbp5l, Arhgap24 
## PC_ 3 
## Positive:  Cdh18, Kcnip4, Klhl1, Pbx3, Kctd8, Csmd3, Gpc6, Htr4, Meis1, Cntn3 
##     Gabrg3, Dlc1, Skap1, Csmd1, C79798, Stxbp5l, March1, Cadm2, Stac, Unc5c 
##     Zbbx, Cdh9, Vwc2l, Sema5a, Serpini1, Epb41l4a, Dmd, Car10, Unc5d, Pde4d 
## Negative:  Adgrg6, Sgcd, Cysltr2, Slc4a4, Grin3a, Gpr149, Ano2, Nos1, Nmu, Itgb6 
##     Dapk2, Dgkg, Ccbe1, Nfia, Rgs6, Lhfpl2, Cpne4, Zfp804a, Otof, Cbln2 
##     Zfp536, Iqgap2, Kcnab1, Efr3a, Gfra1, Ngfr, Smad6, Syt15, Scube1, Adamts14 
## PC_ 4 
## Positive:  Klhl1, Vwc2l, March1, Sema5a, Cdh9, Rasgef1b, Pbx3, Zfhx3, Galnt18, Prune2 
##     Kcnh7, Il1rapl1, Zbbx, Mgat4c, Alk, Lncbate1, C79798, Grm5, Bcl2, Olfr78 
##     Dpp10, Pcdh7, Brinp3, Vcan, Auts2, Dcc, Ntm, Galnt17, Scgn, Ghr 
## Negative:  Dock10, Lingo2, Kcnip4, Csmd3, Sorcs1, Ndst4, Nxph1, Cntn5, Lrp1b, Gda 
##     Cntn3, Nyap2, Sgcz, Thsd4, Robo1, Kctd8, Lrrc7, Tac1, Pld5, Lrrtm4 
##     Tenm2, Nox4, Abca5, Epha5, Kcnq5, Sorcs3, Mctp1, Cpne8, Plcb1, Ryr3 
## PC_ 5 
## Positive:  Ebf1, Ntng1, Trhde, Trps1, Cntn4, Csmd1, Cpa6, Zmat4, Nrg1, Col18a1 
##     Kcnd2, Gna14, Chsy3, Ccdc85a, Ptprd, Myo16, Myo1e, Tmtc2, Sctr, Nkain3 
##     Npas3, Shisa6, Plcxd3, Tenm4, Fbn2, Prkd1, Rmst, Ptprt, Sdk1, Luzp2 
## Negative:  Kcnt2, Prkg1, Dgkb, Thsd7b, Alcam, Car10, Cdh20, Epha5, Vcan, Mgat4c 
##     Dmd, Galntl6, Lrrc4c, Rasgef1b, Vwc2l, Khdrbs2, Cdh9, Gabrg3, Gucy1a2, Kctd8 
##     Man1a, Rgs6, Plcl1, Sema5a, Lingo2, Dpp10, Antxr2, Ptprz1, Nmur2, Htr4
length(setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG,CC_gene)))
## [1] 1074
setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,DIG,CC_gene))[1:300]
##   [1] "Mgat4c"    "Zfp804a"   "Wdr17"     "Cntn5"     "Grm5"      "Klhl1"    
##   [7] "Nmu"       "Adgrg6"    "Zfp804b"   "Brinp3"    "Robo2"     "Cpne4"    
##  [13] "Ntng1"     "Nrxn3"     "Col24a1"   "Tmeff2"    "Kcnb2"     "Cdh8"     
##  [19] "Pdzrn4"    "Ano2"      "Myh11"     "Cemip"     "Cntnap2"   "Lingo2"   
##  [25] "Dgkg"      "Cdh18"     "Ebf1"      "Ntrk3"     "Gpr149"    "Sgcz"     
##  [31] "Cdh19"     "Cdh9"      "Gal"       "Astn2"     "Kcnip4"    "Unc5d"    
##  [37] "Car10"     "Prkg1"     "Vwc2l"     "Nxph2"     "March1"    "Rbpms"    
##  [43] "Dgki"      "Nrg1"      "Dcc"       "Ano1"      "Schip1"    "Gpc5"     
##  [49] "Grin3a"    "Cmah"      "Robo1"     "Loxl2"     "Myl1"      "Itgb6"    
##  [55] "Clstn2"    "Pcdh9"     "Rasgef1b"  "Nkain3"    "Ccbe1"     "Pbx3"     
##  [61] "Sema5a"    "Plxna4"    "Mecom"     "Lpp"       "Sst"       "Rab27b"   
##  [67] "Piezo2"    "Asic2"     "Meis2"     "Pde1a"     "Dpp10"     "Pcdh11x"  
##  [73] "Pcdh10"    "Cysltr2"   "Skap1"     "Zfhx3"     "Bmpr1b"    "Myo3b"    
##  [79] "Csmd1"     "Ghr"       "Egfem1"    "Rerg"      "Areg"      "Kctd16"   
##  [85] "Cdh6"      "Kcnh7"     "Casp8"     "Calcb"     "Col8a1"    "Apol7e"   
##  [91] "Efr3a"     "Vip"       "Flt1"      "Actg2"     "Chrna9"    "Tafa2"    
##  [97] "Ttc29"     "Cntn4"     "Cacna2d3"  "Csmd3"     "Chsy3"     "Spock3"   
## [103] "Prkcq"     "Myh3"      "Fut9"      "Abi3bp"    "Luzp2"     "Tnr"      
## [109] "Ptk7"      "Kcnq5"     "Fbxl7"     "Abca9"     "Plcl1"     "Trps1"    
## [115] "Trhde"     "Gna14"     "Svep1"     "Il1rapl1"  "Serpini1"  "Flrt2"    
## [121] "Col25a1"   "Wee2"      "Kif26b"    "Nxph1"     "Galnt18"   "Nell1"    
## [127] "Pde4d"     "Cpa6"      "Fmo2"      "Penk"      "Spag16"    "Adgrd1"   
## [133] "Galnt13"   "Chrm3"     "Gpm6a"     "Opcml"     "Kctd8"     "Itga1"    
## [139] "Shisa9"    "Fgf14"     "Rasgrf1"   "Hs3st4"    "Tmem79"    "Prune2"   
## [145] "Iqgap2"    "Tenm4"     "Tacr1"     "Cntn3"     "Hs6st3"    "Scd3"     
## [151] "Etl4"      "Zfp286os"  "Rad51b"    "Tac1"      "Cartpt"    "Aff2"     
## [157] "Arhgap6"   "Ano5"      "Atpaf2"    "Slc4a4"    "Met"       "Dgkb"     
## [163] "Nsun2"     "L3mbtl4"   "Cadm2"     "Nos1"      "Lama2"     "Gabrg3"   
## [169] "Galr1"     "Kcnd2"     "Clcnka"    "Npas3"     "Fstl4"     "Abca8a"   
## [175] "Cux2"      "Sctr"      "Galntl6"   "Lrrc43"    "Ntm"       "Grik1"    
## [181] "Olfr78"    "Parm1"     "Mir99ahg"  "Clnk"      "Gabrb1"    "Rarb"     
## [187] "Mid1"      "Il18r1"    "Pla2g10"   "Thsd7b"    "Prkch"     "Nwd1"     
## [193] "Cacnb4"    "Tcl1b4"    "Antxr2"    "Dock10"    "Iigp1"     "Synpr"    
## [199] "Foxp2"     "Acp7"      "Arhgap15"  "Dapk2"     "Muc6"      "Zbbx"     
## [205] "Cftr"      "Prr16"     "Ccdc85a"   "Plxdc2"    "Adam33"    "C3"       
## [211] "Epha7"     "Rxfp1"     "Plcb1"     "Catsperd"  "Wbp2nl"    "Ror1"     
## [217] "Pak7"      "Myocd"     "Cadps2"    "Sema3a"    "Mrc1"      "Nfatc1"   
## [223] "Zfpm2"     "Ggt5"      "Nlrp1a"    "Grm8"      "Tnni3k"    "Trp53cor1"
## [229] "Kirrel3"   "Cntnap5b"  "Nell1os"   "Aspa"      "Tex15"     "Kif28"    
## [235] "C79798"    "Esr1"      "Slc35f1"   "Adam2"     "Pde7b"     "Adam12"   
## [241] "Necab1"    "Hcn1"      "Dach1"     "Clca3a2"   "Adamts6"   "Cpn1"     
## [247] "Adamts9"   "Tenm2"     "Thsd4"     "Nox4"      "C1qtnf7"   "Ccdc3"    
## [253] "Sorcs1"    "Dlc1"      "Edil3"     "Sox2ot"    "Tafa1"     "Slc44a5"  
## [259] "Fbln1"     "Calcrl"    "Cdkn3"     "Kit"       "Slamf1"    "Mast4"    
## [265] "Dab1"      "Cpne8"     "Sulf1"     "Ephb1"     "Hpse2"     "Pgm5"     
## [271] "Rmst"      "Pde4c"     "Sorcs3"    "Rgs6"      "Col11a1"   "Fancd2"   
## [277] "Tenm1"     "Slc6a16"   "Lhfpl2"    "Otof"      "Prkca"     "Acpp"     
## [283] "Gng2"      "Myo1h"     "Kynu"      "Cmya5"     "Htr4"      "Morn5"    
## [289] "Ifi203"    "Rtl4"      "Ntrk2"     "Grp"       "Mrvi1"     "Irf5"     
## [295] "Grm7"      "Postn"     "Ptprt"     "Stk31"     "Gpc6"      "Eya1"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

color.FB1 <- color.FB[c(1:3,5:7,10)]
GEX.seur$FB.new <- factor(as.character(GEX.seur$FB.info),
                          levels = c(paste0("CTL.",c(1:3,5)),
                                     paste0("CKO.",c(1,2,5))))
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.new", cols = color.FB1) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.new", cols = color.FB1)

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:25
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 2.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 13190
## Number of edges: 533879
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8119
## Number of communities: 31
## Elapsed time: 1 seconds

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 145)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 21:14:20 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:14:20 Read 13190 rows and found 25 numeric columns
## 21:14:20 Using Annoy for neighbor search, n_neighbors = 20
## 21:14:20 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:14:22 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpeITGOz\file809422f73f9d
## 21:14:22 Searching Annoy index using 1 thread, search_k = 2000
## 21:14:25 Annoy recall = 100%
## 21:14:25 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 21:14:26 Initializing from normalized Laplacian + noise (using irlba)
## 21:14:27 Commencing optimization for 200 epochs, with 394126 positive edges
## 21:14:40 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.new", split.by = "FB.new", 
        ncol = 4, cols = color.FB1)

GEX.seur$cnt <- factor(gsub(".1$|.2$|.3$|.4$|.5$","",as.character(GEX.seur$FB.info)),
                       levels = c("CTL",
                                  "CKO"))
color.cnt <- color.FB[c(2,7)]
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

check short markers

markers.new.s <- list(EMN=c("Chat","Bnc2","Tox","Ptprt",
                       "Gfra2","Oprk1","Adamtsl1", 
                       "Fbxw15","Fbxw24","Chrna7",
                       "Satb1","Cntnap5b",
                       "Gabrb1","Nxph1","Lama2","Lrrc7",
                       "Ryr3","Eda","Tac1",
                       "Kctd8","Ntrk2","Penk",
                       "Fut9","Nfatc1","Egfr","Mgll",
                       "Chrm3"
                       ),
                 IMN=c("Nos1","Kcnab1",
                       "Gfra1","Etv1",
                       "Man1a","Airn",
                       "Adcy2","Cmah","Creb5","Vip","Pde1a",
                       "Ebf1","Gpc5"
                       ),
                 IN=c("Npas3","Synpr","St18","Gal",
                      "Kcnk13",
                      "Neurod6","Moxd1","Sctr",
                      "Piezo1","Sst","Adamts9","Kcnn2",
                      "Calb2"),
                 IPAN=c("Calcb","Nmu","Adgrg6","Pcdh10",
                        "Ngfr","Galr1","Il7","Aff2",
                        "Gpr149","Cdh6","Cdh8",
                        "Clstn2","Ano2","Ntrk3",
                        "Cpne4","Vwc2l","Cdh9","Scgn",
                        "Vcan","Cck","Piezo2","Kcnh7",
                        "Rerg","Bmpr1b","Skap1","Ntng1",
                        "Tafa2","Nxph2"),
                 CONTAM=c("Actl6b","Phox2b","Sox10","Plp1",
                          "Gfap","Rxrg","Pdgfra","Acta2"))

#
pm.All_new.s <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.s)), group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new.s

#
pm.CTL_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CTL")), features = as.vector(unlist(markers.new.s)), group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CTL only")
pm.CTL_new.s

#
pm.CKO_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CKO")), features = as.vector(unlist(markers.new.s)), group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CKO only")
pm.CKO_new.s

DoubletFinder

library(DoubletFinder)
for(i in 1:length(sweep.res.list)){
  if(length(sweep.res.list[[i]]$pANN[is.nan(sweep.res.list[[i]]$pANN)]) !=0){
    if(i!=1){
      sweep.res.list[[i]] <- sweep.res.list[[i-1]]
    }else(
      sweep.res.list[[i]] <- sweep.res.list[[i+1]]
    )
  }
}
sweep.stats <- summarizeSweep(sweep.res.list, GT=FALSE)
bcmvn <- find.pK(sweep.stats)

## NULL
pk_v <- as.numeric(as.character(bcmvn$pK))
pk_good <- pk_v[bcmvn$BCmetric==max(bcmvn$BCmetric)]
# specify expected doublet number     
nExp_poi <- round(0.05*length(colnames(GEX.seur)))

GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good, 
                             nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 4397 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.05"
# specify expected doublet number     
nExp_poi <- round(0.1*length(colnames(GEX.seur)))

GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good, 
                             nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 4397 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.1"
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.05") +
  DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.1")

sort_clusters

GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
                                 levels = c(0,2,19,15,8, 4, 3, 16, 22,         # EMN
                                            1, 5,11, 6, 9,             # IMN
                                            12, 18, 27,             # IN
                                            10,7, 13,17, 21, 20,30,           # IPAN
                                            24, 23, 14, 28, 29,   # Mix
                                            25,     # Glia
                                            26      # SMC
                                            ))

# preAnno
GEX.seur$preAnno <- as.character(GEX.seur$seurat_clusters)

GEX.seur$preAnno[GEX.seur$preAnno %in% c(0,2,19,15,8)] <- "EMN1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(4)] <- "EMN2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(3)] <- "EMN3"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(16)] <- "EMN4"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(22)] <- "EMN5"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(1)] <- "IMN1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(5,11)] <- "IMN2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(6)] <- "IMN3"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(9)] <- "IMN4"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(12)] <- "IN1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(18)] <- "IN2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(27)] <- "IN3"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(10,7)] <- "IPAN1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(13,17)] <- "IPAN2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(21)] <- "IPAN3"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(20,30)] <- "IPAN4"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(24, 23, 14, 28, 29)] <- "MIX"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(25)] <- "Glia"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(26)] <- "SMC"

GEX.seur$preAnno <- factor(GEX.seur$preAnno,
                            levels = c(paste0("EMN",1:5),
                                       paste0("IMN",1:4),
                                       paste0("IN",1:3),
                                       paste0("IPAN",1:4),
                                       "MIX","Glia","SMC"))
markers.new.s <- list(EMN=c("Chat","Bnc2","Tox","Ptprt",
                       "Gfra2","Oprk1","Adamtsl1", 
                       "Fbxw15","Fbxw24","Chrna7",
                       "Satb1","Cntnap5b",
                       "Gabrb1","Nxph1","Lama2","Lrrc7",
                       "Ryr3","Eda","Tac1",
                       "Kctd8","Ntrk2","Penk",
                       "Fut9","Nfatc1","Egfr","Mgll",
                       "Chrm3"
                       ),
                 IMN=c("Nos1","Kcnab1",
                       "Gfra1","Etv1",
                       "Man1a","Airn",
                       "Adcy2","Cmah","Creb5","Vip","Pde1a",
                       "Ebf1","Gpc5"
                       ),
                 IN=c("Npas3","Synpr","St18","Gal",
                      "Kcnk13",
                      "Neurod6","Moxd1","Sctr",
                      "Piezo1","Sst","Adamts9","Kcnn2",
                      "Calb2"),
                 IPAN=c("Calcb","Nmu","Adgrg6","Pcdh10",
                        "Ngfr","Galr1","Il7","Aff2",
                        "Gpr149","Cdh6","Cdh8",
                        "Clstn2","Ano2","Ntrk3",
                        "Cpne4","Vwc2l","Cdh9","Scgn",
                        "Vcan","Cck","Piezo2","Kcnh7",
                        "Rerg","Bmpr1b","Skap1","Ntng1",
                        "Tafa2","Nxph2"),
                 CONTAM=c("Actl6b","Phox2b","Sox10","Plp1",
                          "Gfap","Rxrg","Pdgfra","Acta2"))

#
pm.All_new.s <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.s)), group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new.s

#
pm.CTL_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CTL")), features = as.vector(unlist(markers.new.s)), group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CTL only")
pm.CTL_new.s

#
pm.CKO_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CKO")), features = as.vector(unlist(markers.new.s)), group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CKO only")
pm.CKO_new.s

#
pm.All_new.s <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.s)), group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new.s

#
pm.CTL_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CTL")), features = as.vector(unlist(markers.new.s)), group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CTL only")
pm.CTL_new.s

#
pm.CKO_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CKO")), features = as.vector(unlist(markers.new.s)), group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CKO only")
pm.CKO_new.s

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

#saveRDS(GEX.seur,"GEX0207.seur.pre.rds")

pure

filtering

neur.clusters <- grep("EMN|IMN|IN|IPAN",levels(GEX.seur$preAnno), value = T)
neur.clusters
##  [1] "EMN1"  "EMN2"  "EMN3"  "EMN4"  "EMN5"  "IMN1"  "IMN2"  "IMN3"  "IMN4" 
## [10] "IN1"   "IN2"   "IN3"   "IPAN1" "IPAN2" "IPAN3" "IPAN4"
GEX.seur <- subset(GEX.seur, subset = preAnno %in% neur.clusters & DoubletFinder0.1 == "Singlet")
GEX.seur
## An object of class Seurat 
## 24175 features across 11385 samples within 1 assay 
## Active assay: RNA (24175 features, 1074 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

#
pm.All_new.s <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.s)), group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new.s

#
pm.CTL_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CTL")), features = as.vector(unlist(markers.new.s)), group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CTL only")
pm.CTL_new.s

#
pm.CKO_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CKO")), features = as.vector(unlist(markers.new.s)), group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CKO only")
pm.CKO_new.s

re-clustering

GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2

head(VariableFeatures(GEX.seur), 200)
##   [1] "Mgat4c"        "Zfp804a"       "Cntn5"         "Grm5"         
##   [5] "Klhl1"         "Nmu"           "Adgrg6"        "Gm32647"      
##   [9] "Gm29521"       "Gm30613"       "Brinp3"        "Robo2"        
##  [13] "Cpne4"         "Nrxn3"         "Kcnb2"         "Ntng1"        
##  [17] "Zfp804b"       "Tmeff2"        "Pdzrn4"        "4930432L08Rik"
##  [21] "Cdh8"          "Cntnap2"       "Col24a1"       "Lingo2"       
##  [25] "Ano2"          "Cdh18"         "Dgkg"          "Gm21847"      
##  [29] "Sgcz"          "Ntrk3"         "Gpr149"        "Kcnip4"       
##  [33] "Ebf1"          "Cdh9"          "Prkg1"         "Astn2"        
##  [37] "Dgki"          "Car10"         "Unc5d"         "4930509J09Rik"
##  [41] "March1"        "1700111E14Rik" "Nxph2"         "Nrg1"         
##  [45] "Schip1"        "Vwc2l"         "Gm38405"       "Gal"          
##  [49] "Gm15261"       "Gm20754"       "Pcdh9"         "Loxl2"        
##  [53] "Dcc"           "Gpc5"          "Cmah"          "Gm49938"      
##  [57] "Robo1"         "Clstn2"        "Myl1"          "Itgb6"        
##  [61] "Nkain3"        "Ccbe1"         "Grin3a"        "Sema5a"       
##  [65] "Rasgef1b"      "Pbx3"          "Plxna4"        "Asic2"        
##  [69] "Gm31045"       "Piezo2"        "Pde1a"         "Rab27b"       
##  [73] "Pcdh11x"       "9530059O14Rik" "Csmd1"         "Dpp10"        
##  [77] "Col8a1"        "Zfhx3"         "Bmpr1b"        "Pcdh10"       
##  [81] "Rerg"          "Skap1"         "Egfem1"        "Kctd16"       
##  [85] "Cysltr2"       "Gm30382"       "Sst"           "Ghr"          
##  [89] "Cdh6"          "AC150683.1"    "Efr3a"         "Casp8"        
##  [93] "Chsy3"         "Csmd3"         "Vip"           "Kcnh7"        
##  [97] "Calcb"         "Gm16685"       "Cacna2d3"      "Spock3"       
## [101] "Kcnq5"         "Trps1"         "Ttc29"         "Ptk7"         
## [105] "9530026P05Rik" "Plcl1"         "Cntn4"         "Tafa2"        
## [109] "Il1rapl1"      "Luzp2"         "1700034P13Rik" "Tnr"          
## [113] "Chrna9"        "Spag16"        "Gm15680"       "Fbxl7"        
## [117] "Fut9"          "Flrt2"         "Gna14"         "Abi3bp"       
## [121] "Col25a1"       "Serpini1"      "Gm34544"       "Pde4d"        
## [125] "Galnt13"       "Trhde"         "Gm20713"       "Nell1"        
## [129] "Nxph1"         "Gm15584"       "Fgf14"         "Galnt18"      
## [133] "Kif26b"        "Gm49953"       "Cpa6"          "Myo3b"        
## [137] "E230025N22Rik" "Cemip"         "Kctd8"         "4930422I22Rik"
## [141] "5530401A14Rik" "Abca9"         "Shisa9"        "Wee2"         
## [145] "Penk"          "Dgkb"          "A330008L17Rik" "4930587E11Rik"
## [149] "Hs6st3"        "Chrm3"         "8030451O07Rik" "Prune2"       
## [153] "Gm11342"       "Opcml"         "Gm26737"       "4930471E19Rik"
## [157] "Rasgrf1"       "Cadm2"         "Gm49678"       "Iqgap2"       
## [161] "Cntn3"         "Nos1"          "L3mbtl4"       "Kcnd2"        
## [165] "Hs3st4"        "Slc4a4"        "Gabrg3"        "Ano5"         
## [169] "Tenm4"         "Atpaf2"        "D030068K23Rik" "B230323A14Rik"
## [173] "Gm40841"       "Gm37267"       "Galntl6"       "Pla2g10"      
## [177] "Apol7e"        "Cenpf"         "Aff2"          "Gm30094"      
## [181] "Galr1"         "Tac1"          "Arhgap15"      "Cartpt"       
## [185] "Nsun2"         "Parm1"         "Gm20560"       "Rarb"         
## [189] "Arhgap6"       "Cux2"          "Grik1"         "Clnk"         
## [193] "Olfr78"        "A230006K03Rik" "Antxr2"        "4930445B16Rik"
## [197] "Rad51b"        "Tmem79"        "Dock10"        "Met"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix

PCA

# exclude MT genes  and more 

#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)

DIG <- grep("^mt-|^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AW|^BC|^Gm|^Hist|Rik$|-ps",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))


VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
                                                c(#MT_gene,
                                                  DIG, 
                                                  CC_gene) )

GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1 
## Positive:  Lrrtm4, Galntl6, Ndst4, Tox, Kcnd2, Grik1, Epha5, Pcdh7, Bnc2, Kcnab1 
##     Tshz2, Chrna7, Lrp1b, Cdc14a, Zfp536, Nos1, Lrrc4c, Galnt17, Synpo2, Specc1 
##     Syt6, Rspo2, Dach1, Tenm3, Fat3, Plcxd3, Lrrc7, Brinp2, Fbxw15, Cadps2 
## Negative:  Ntrk3, Tmeff2, Robo2, Ano2, Cdh8, Cpne4, Nrxn3, Mgat4c, Myl1, Clstn2 
##     Pdzrn4, Adgrg6, Dgkg, Gpr149, Grin3a, Astn2, Cntn5, Plxna4, Cacna2d3, Pcdh10 
##     Sgcz, Spock3, Kif26b, Zfp804a, Cysltr2, Iqgap2, Cdh6, Cux2, Cntnap2, Itgb6 
## PC_ 2 
## Positive:  Rbfox1, Ptprt, Bnc2, Tafa1, Tshz2, Grik1, Gpc6, Frmd4b, Brinp2, Tox 
##     Fbxw15, St6galnac3, Tmem132c, Cdc14a, Tcf7l2, Pcdh7, Caln1, Plcxd3, Fbxw24, Pld5 
##     Adamtsl1, Sdk2, Slc35f4, Nfia, Stxbp5l, Oprk1, Specc1, Unc5c, Dock2, Arhgap24 
## Negative:  Auts2, Nos1, Egfem1, Etv1, Cadps2, Kcnq5, Dgkb, Asic2, Schip1, Gfra1 
##     Rgs6, Cmah, Kcnab1, Epha5, Dach1, Kcnt2, Alcam, Stxbp6, Il1rapl1, Adgrl3 
##     Tmem108, Lrrc4c, Creb5, Gria3, Cdh11, Grid1, Ebf1, Hs6st3, Gramd1b, Pde1a 
## PC_ 3 
## Positive:  Cdh18, Kcnip4, Klhl1, Pbx3, Kctd8, Csmd3, Gpc6, Htr4, Meis1, Cntn3 
##     Dlc1, Gabrg3, Skap1, C79798, March1, Csmd1, Zbbx, Sema5a, Stxbp5l, Vwc2l 
##     Cdh9, Serpini1, Cadm2, Stac, Zfhx3, Unc5c, Epb41l4a, Pde4d, Car10, Dmd 
## Negative:  Sgcd, Adgrg6, Slc4a4, Cysltr2, Nos1, Grin3a, Gpr149, Nfia, Ano2, Nmu 
##     Itgb6, Rgs6, Dapk2, Ccbe1, Dgkg, Lhfpl2, Cpne4, Zfp536, Zfp804a, Kcnab1 
##     Otof, Gfra1, Iqgap2, Efr3a, Cbln2, Syt2, Ngfr, Scube1, Smad6, Syt15 
## PC_ 4 
## Positive:  Klhl1, March1, Vwc2l, Sema5a, Cdh9, Prune2, Kcnh7, Pbx3, Zfhx3, Rasgef1b 
##     Galnt18, Il1rapl1, Alk, Zbbx, Chsy3, Galnt17, Bcl2, Lncbate1, Pcdh7, Kcnb2 
##     Grm5, C79798, Mgat4c, Sntg1, Dcc, Olfr78, Tenm4, Auts2, Ntng1, Brinp3 
## Negative:  Dock10, Lingo2, Kcnip4, Sorcs1, Ndst4, Csmd3, Nxph1, Cntn5, Kctd8, Gda 
##     Tac1, Cntn3, Nyap2, Thsd4, Sgcz, Lrp1b, Robo1, Lrrtm4, Prkg1, Epha5 
##     Lrrc7, Dmd, Nox4, Pgm5, Tenm2, Pld5, Sorcs3, Abca5, Ryr3, Kcnq5 
## PC_ 5 
## Positive:  Dgkb, Kcnt2, Prkg1, Alcam, Thsd7b, Vwc2l, Vcan, Mgat4c, Rasgef1b, Cdh9 
##     Car10, Sema5a, Dpp10, Cdh20, Klhl1, Khdrbs2, Lrrc4c, Galntl6, Gucy1a2, Dmd 
##     Nmur2, Gabrg3, Epha5, Slc2a13, P3h2, Antxr2, Rgs6, Stard13, Hmcn1, Olfr78 
## Negative:  Trhde, Ebf1, Trps1, Ntng1, Cntn4, Csmd1, Cpa6, Nrg1, Zmat4, Ptprd 
##     Kcnd2, Col18a1, Ccdc85a, Myo1e, Gna14, Myo16, Rmst, Chsy3, Npas3, Sctr 
##     Nkain3, Shisa6, Tmtc2, Luzp2, Fbn2, Egfem1, Nav2, Plcxd3, Prkd1, Prkg2
length(setdiff(VariableFeatures(object = GEX.seur),
                                                c(#MT_gene,
                                                  DIG,CC_gene)))
## [1] 1067
setdiff(VariableFeatures(object = GEX.seur),
                                                c(#MT_gene,
                                                  DIG,CC_gene))[1:300]
##   [1] "Mgat4c"    "Zfp804a"   "Cntn5"     "Grm5"      "Klhl1"     "Nmu"      
##   [7] "Adgrg6"    "Brinp3"    "Robo2"     "Cpne4"     "Nrxn3"     "Kcnb2"    
##  [13] "Ntng1"     "Zfp804b"   "Tmeff2"    "Pdzrn4"    "Cdh8"      "Cntnap2"  
##  [19] "Col24a1"   "Lingo2"    "Ano2"      "Cdh18"     "Dgkg"      "Sgcz"     
##  [25] "Ntrk3"     "Gpr149"    "Kcnip4"    "Ebf1"      "Cdh9"      "Prkg1"    
##  [31] "Astn2"     "Dgki"      "Car10"     "Unc5d"     "March1"    "Nxph2"    
##  [37] "Nrg1"      "Schip1"    "Vwc2l"     "Gal"       "Pcdh9"     "Loxl2"    
##  [43] "Dcc"       "Gpc5"      "Cmah"      "Robo1"     "Clstn2"    "Myl1"     
##  [49] "Itgb6"     "Nkain3"    "Ccbe1"     "Grin3a"    "Sema5a"    "Rasgef1b" 
##  [55] "Pbx3"      "Plxna4"    "Asic2"     "Piezo2"    "Pde1a"     "Rab27b"   
##  [61] "Pcdh11x"   "Csmd1"     "Dpp10"     "Col8a1"    "Zfhx3"     "Bmpr1b"   
##  [67] "Pcdh10"    "Rerg"      "Skap1"     "Egfem1"    "Kctd16"    "Cysltr2"  
##  [73] "Sst"       "Ghr"       "Cdh6"      "Efr3a"     "Casp8"     "Chsy3"    
##  [79] "Csmd3"     "Vip"       "Kcnh7"     "Calcb"     "Cacna2d3"  "Spock3"   
##  [85] "Kcnq5"     "Trps1"     "Ttc29"     "Ptk7"      "Plcl1"     "Cntn4"    
##  [91] "Tafa2"     "Il1rapl1"  "Luzp2"     "Tnr"       "Chrna9"    "Spag16"   
##  [97] "Fbxl7"     "Fut9"      "Flrt2"     "Gna14"     "Abi3bp"    "Col25a1"  
## [103] "Serpini1"  "Pde4d"     "Galnt13"   "Trhde"     "Nell1"     "Nxph1"    
## [109] "Fgf14"     "Galnt18"   "Kif26b"    "Cpa6"      "Myo3b"     "Cemip"    
## [115] "Kctd8"     "Abca9"     "Shisa9"    "Wee2"      "Penk"      "Dgkb"     
## [121] "Hs6st3"    "Chrm3"     "Prune2"    "Opcml"     "Rasgrf1"   "Cadm2"    
## [127] "Iqgap2"    "Cntn3"     "Nos1"      "L3mbtl4"   "Kcnd2"     "Hs3st4"   
## [133] "Slc4a4"    "Gabrg3"    "Ano5"      "Tenm4"     "Atpaf2"    "Galntl6"  
## [139] "Pla2g10"   "Apol7e"    "Aff2"      "Galr1"     "Tac1"      "Arhgap15" 
## [145] "Cartpt"    "Nsun2"     "Parm1"     "Rarb"      "Arhgap6"   "Cux2"     
## [151] "Grik1"     "Clnk"      "Olfr78"    "Antxr2"    "Rad51b"    "Tmem79"   
## [157] "Dock10"    "Met"       "Lama2"     "Thsd7b"    "Npas3"     "Cacnb4"   
## [163] "Clca3a2"   "Adamts9"   "Mrc1"      "Myh3"      "Mid1"      "Gabrb1"   
## [169] "Sctr"      "Plxdc2"    "Necab1"    "Dapk2"     "Pak7"      "Ntm"      
## [175] "Pde7b"     "Synpr"     "Zbbx"      "Cadps2"    "Epha7"     "Ror1"     
## [181] "Ggt5"      "Etl4"      "Catsperd"  "Nfatc1"    "Tafa1"     "Kirrel3"  
## [187] "Thsd4"     "Dach1"     "Cntnap5b"  "Edil3"     "Postn"     "Plcb1"    
## [193] "Dlc1"      "Kif28"     "C1qtnf7"   "Prr16"     "Rxfp1"     "Nell1os"  
## [199] "Ccdc85a"   "Adam12"    "Irf5"      "Plin1"     "Flt1"      "Hcn1"     
## [205] "Efcab6"    "Tenm2"     "Ephb1"     "Sorcs3"    "Trp53cor1" "Adamts6"  
## [211] "Mast4"     "C79798"    "Cdkn3"     "Sorcs1"    "Calcrl"    "Col11a1"  
## [217] "Cmya5"     "Cpne8"     "Sorcs2"    "Rgs6"      "Gpc6"      "Acp7"     
## [223] "Cpn1"      "Prkca"     "Aunip"     "Gng2"      "Esr1"      "Tenm1"    
## [229] "Nwd1"      "Myo3a"     "Clec9a"    "Acpp"      "Otof"      "Htr4"     
## [235] "Slamf1"    "Slc44a5"   "Stk31"     "Bfsp2"     "Nox4"      "Lhfpl2"   
## [241] "Fbln1"     "Slc6a16"   "Grm7"      "Ntrk2"     "Bcl2"      "Tacr1"    
## [247] "Rmst"      "Dab1"      "Adam2"     "Kynu"      "Slc22a4"   "Ptprt"    
## [253] "Auts2"     "Etv1"      "Tcf7l1"    "Npl"       "Stxbp6"    "Moxd1"    
## [259] "Lrp1b"     "Klhl32"    "Gfra1"     "Kcnk2"     "Sema3e"    "Vcan"     
## [265] "Yae1d1"    "Ifi203"    "Olfr889"   "Alcam"     "Lmcd1"     "Eya1"     
## [271] "Efemp1"    "Adgrl2"    "Syt10"     "Ppp3ca"    "Pax6os1"   "Gip"      
## [277] "Olfr720"   "Nuggc"     "Mettl7a2"  "Fign"      "Prkag2"    "Tcl1b4"   
## [283] "Epas1"     "Htr2c"     "Syt9"      "Bche"      "Grp"       "Lrrc4c"   
## [289] "Tbx20"     "Clcnka"    "Lamc2"     "Tex21"     "Adcy2"     "Brinp2"   
## [295] "Tmem163"   "Myo1e"     "Itgbl1"    "Slc16a12"  "Pcdh17"    "Ngfr"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB1)

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:24
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11385
## Number of edges: 465726
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8028
## Number of communities: 25
## Elapsed time: 1 seconds

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 145)
## 21:37:35 UMAP embedding parameters a = 0.9922 b = 1.112
## Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
## Also defined by 'spam'
## 21:37:35 Read 11385 rows and found 24 numeric columns
## 21:37:35 Using Annoy for neighbor search, n_neighbors = 20
## Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
## Also defined by 'spam'
## 21:37:35 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:37:37 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpeITGOz\file809420556d81
## 21:37:37 Searching Annoy index using 1 thread, search_k = 2000
## 21:37:39 Annoy recall = 100%
## 21:37:40 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 21:37:41 Initializing from normalized Laplacian + noise (using irlba)
## 21:37:41 Commencing optimization for 200 epochs, with 337322 positive edges
## 21:37:53 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

GEX.seur <- subset(GEX.seur, subset = seurat_clusters %in% 0:22)
GEX.seur
## An object of class Seurat 
## 24175 features across 11307 samples within 1 assay 
## Active assay: RNA (24175 features, 1067 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

newAnno

GEX.seur$newAnno <- as.character(GEX.seur$seurat_clusters)

GEX.seur$newAnno[GEX.seur$newAnno %in% c(0,17,1,15,6)] <- "EMN1"
GEX.seur$newAnno[GEX.seur$newAnno %in% c(5)] <- "EMN2"
GEX.seur$newAnno[GEX.seur$newAnno %in% c(8,9)] <- "EMN3"
GEX.seur$newAnno[GEX.seur$newAnno %in% c(12)] <- "EMN4"

GEX.seur$newAnno[GEX.seur$newAnno %in% c(2,10,3)] <- "IMN1"
GEX.seur$newAnno[GEX.seur$newAnno %in% c(4)] <- "IMN2"
GEX.seur$newAnno[GEX.seur$newAnno %in% c(11)] <- "IMN3"

GEX.seur$newAnno[GEX.seur$newAnno %in% c(13)] <- "IN1"
GEX.seur$newAnno[GEX.seur$newAnno %in% c(21)] <- "IN2"
GEX.seur$newAnno[GEX.seur$newAnno %in% c(22)] <- "IN3"

GEX.seur$newAnno[GEX.seur$newAnno %in% c(7,16)] <- "IPAN1"
GEX.seur$newAnno[GEX.seur$newAnno %in% c(14,20)] <- "IPAN2"
GEX.seur$newAnno[GEX.seur$newAnno %in% c(18)] <- "IPAN3"
GEX.seur$newAnno[GEX.seur$newAnno %in% c(19)] <- "IPAN4"

GEX.seur$newAnno <- factor(GEX.seur$newAnno,
                           levels = c(paste0("EMN",1:4),
                                      paste0("IMN",1:3),
                                      paste0("IN",1:3),
                                      paste0("IPAN",1:4)))
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "newAnno") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))

#
pm.All_new.s <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.s)), group.by = "newAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new.s

#
pm.CTL_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CTL")), features = as.vector(unlist(markers.new.s)), group.by = "newAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CTL only")
pm.CTL_new.s

#
pm.CKO_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CKO")), features = as.vector(unlist(markers.new.s)), group.by = "newAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CKO only")
pm.CKO_new.s

composition

GEX.seur$rep <- paste0("rep",
                        gsub("CTL.|CKO.","",as.character(GEX.seur$FB.info)))
DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.new", split.by = "FB.new", 
        ncol = 4, cols = color.FB1)

heatmap.1

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.new,
      clusters=GEX.seur$newAnno),
                   main = "Cell Count",
                   gaps_row = c(4),
      gaps_col = c(4,7,10),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.new,
                                clusters=GEX.seur$newAnno)),
                   main = "Cell Ratio",
                   gaps_row = c(4),
      gaps_col = c(4,7,10),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

barplot.1

stat_newAnno <- GEX.seur@meta.data[,c("cnt","rep","newAnno")]

stat_newAnno.s <- stat_newAnno %>%
  group_by(cnt,rep,newAnno) %>%
  summarise(count=n()) %>%
  mutate(prop= count/sum(count)) %>%
  ungroup
## `summarise()` has grouped output by 'cnt', 'rep'. You can override using the
## `.groups` argument.
pcom.newAnno <- stat_newAnno.s %>%
  ggplot(aes(x = newAnno, y = 100*prop, color=cnt)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  theme_classic(base_size = 15) +
  scale_color_manual(values = c("skyblue","pink"), name="") +
  labs(title="Cell Composition of SI data, newAnno", y = "Proportion") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
  
  geom_point(aes(x=newAnno, y = 100*prop, color= cnt),
             position = position_dodge(0.75),
             shape=16,alpha=0.75,size=2.15,
             stroke=0.15, show.legend=F)

pcom.newAnno

sig.test

glm.nb - anova.LRT

Sort.N <- c("CTL","CKO")

glm.nb_anova.newAnno <- list()

for(x1 in 1:2){
  for(x2 in 1:2){
    if(x2>x1){
      
      stat_newAnno.s_N <- stat_newAnno.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
      
      
      total.N <- stat_newAnno.s_N %>%
        group_by(cnt, rep) %>%
        summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
      
      rownames(total.N) <- paste0(as.character(total.N$cnt),
                                  "_",
                                  as.character(total.N$rep))
      
      stat_newAnno.s_N$total <- total.N[paste0(stat_newAnno.s_N$cnt,
                                            "_",
                                            stat_newAnno.s_N$rep),"total"]
      
      glm.nb_anova.newAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
      
      for(AA in levels(GEX.seur$newAnno)){
        glm.nb_anova.newAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
          aaa <- stat_newAnno.s_N %>% filter(newAnno==AA)
          aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
          aaa.anova <- anova(aaa.glmnb, test = "LRT")
          aaa.anova$'Pr(>Chi)'[2]
        }, error=function(e){
        
          tryCatch({
            aaa <- stat_newAnno.s_N %>% filter(newAnno==AA)
            aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
            aaa.anova <- anova(aaa.glmnb, test = "LRT")
            aaa.anova$'Pr(>Chi)'[2]
          }, error=function(e){
            
            tryCatch({
              aaa <- stat_newAnno.s_N %>% filter(newAnno==AA)
              aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
              aaa.anova <- anova(aaa.glmnb, test = "LRT")
              aaa.anova$'Pr(>Chi)'[2]
            }, error = function(e){
              NA
            })     
          })
        })
      }
      glm.nb_anova.newAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.newAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]]) 
    }
  }
}
glm.nb_anova.newAnno_df <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.newAnno)))
rownames(glm.nb_anova.newAnno_df) <- names(glm.nb_anova.newAnno)
colnames(glm.nb_anova.newAnno_df) <- gsub("X","C",colnames(glm.nb_anova.newAnno_df))
glm.nb_anova.newAnno_df
##                EMN1      EMN2     EMN3      EMN4      IMN1      IMN2      IMN3
## CTLvsCKO 0.01670205 0.8044634 0.871329 0.1845469 0.1125405 0.2643153 0.9863427
##                IN1       IN2       IN3     IPAN1     IPAN2     IPAN3     IPAN4
## CTLvsCKO 0.4241626 0.5276967 0.5222546 0.6445882 0.3799135 0.6506602 0.7076578
round(glm.nb_anova.newAnno_df,4)
##            EMN1   EMN2   EMN3   EMN4   IMN1   IMN2   IMN3    IN1    IN2    IN3
## CTLvsCKO 0.0167 0.8045 0.8713 0.1845 0.1125 0.2643 0.9863 0.4242 0.5277 0.5223
##           IPAN1  IPAN2  IPAN3  IPAN4
## CTLvsCKO 0.6446 0.3799 0.6507 0.7077
#saveRDS(GEX.seur,"Ba53Nb.Ileum_Nb5d.pure_newAnno20240208.rds")